Package org.terrier.structures.indexing.singlepass

Source Code of org.terrier.structures.indexing.singlepass.BlockMemoryPostings

/*
* Terrier - Terabyte Retriever
* Webpage: http://terrier.org
* Contact: terrier{a.}dcs.gla.ac.uk
* University of Glasgow - School of Computing Science
* http://www.gla.ac.uk/
*
* The contents of this file are subject to the Mozilla Public License
* Version 1.1 (the "License"); you may not use this file except in
* compliance with the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS"
* basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
* the License for the specific language governing rights and limitations
* under the License.
*
* The Original Code is BlockMemoryPostings.java.
*
* The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
* All Rights Reserved.
*
* Contributor(s):
*   Roi Blanco (rblanc{at}@udc.es)
*   Craig Macdonald (craigm{at}dcs.gla.ac.uk)
*/
package org.terrier.structures.indexing.singlepass;

import java.io.IOException;

import org.terrier.structures.indexing.BlockDocumentPostingList;
import org.terrier.structures.indexing.DocumentPostingList;

/**
* Class for handling posting lists containing block information in memory while indexing.
* @author Roi Blanco
*/
public class BlockMemoryPostings extends MemoryPostings{

  /**    
   * Add the terms in a DocumentPostingList to the postings in memory.   
   * @param _docPostings BlockDocumentPostingList containing the posting information for one document.
   * @param docid Document id of the indexed document
   * @throws IOException if an I/O error occurs.   
   */
  public void addTerms(DocumentPostingList _docPostings, int docid) throws IOException {    
    BlockDocumentPostingList docPostings = (BlockDocumentPostingList_docPostings;   
    for (String term : docPostings.termSet())   
      add(term, docid, docPostings.getFrequency(term), docPostings.getBlocks(term));   
  }

  /** Add the specified to the term posting to the memory postings.
   * @param term The String form of the term
   * @param doc the document id of the doc
   * @param frequency the frequency of the specified term in the document
   * @param blocks the blockids at which this term occurs
   */
  public void add(String term, int doc, int frequency, int[] blocks) throws IOException{
    BlockPosting post;
   
    if((post =(BlockPosting) postings.get(term)) != null) {           
      valueBytes += post.insert(doc, frequency, blocks);
      int tf = post.getTF();
      // Update the max size
      if(maxSize < tf) maxSize = tf;
    }
    else{
      post = new BlockPosting();
      valueBytes += post.writeFirstDoc(doc, frequency, blocks);     
      postings.put(term,post);
      keyBytes += (long)(12 + 2*term.length());
    }
    numPointers++;
 
TOP

Related Classes of org.terrier.structures.indexing.singlepass.BlockMemoryPostings

TOP
Copyright © 2018 www.massapi.com. All rights reserved.
All source code are property of their respective owners. Java is a trademark of Sun Microsystems, Inc and owned by ORACLE Inc. Contact coftware#gmail.com.